TEXHYF.SAI[TEX,DEK]5 - www.SailDart.org

perm filename TEXHYF.SAI[TEX,DEK]5 blob sn#500220 filedate 1980-04-07 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00008 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00002 00002	begin comment TEX hyphenation routines
C00005 00003	procedure hyphenate(integer u,n)
C00021 00004	Initializing the exception table
C00031 00005	Initializing the suffix table
C00038 00006	Initializing the prefix table
C00044 00007	Initializing the consonant-pair table
C00046 00008	The driver program
C00047 ENDMK
C⊗;
begin comment TEX hyphenation routines;

comment This program was used to test TEX's hyphenation routines before
incorporating them verbatim into  TEX itself. The tables and program may not be
quite up-to-date, TEXPRE and TEXSEM contain the current versions;

comment external procedure bail;

define bitsperwd=36;
require "⊂⊃⊂⊃" delimiters;
define # = ⊂;comment⊃;
define newline = ⊂('15&'12)⊃;
define thru = ⊂step 1 until⊃;
define flag=⊂(1 rot -1)⊃;

integer array mem[0:100];

define fs(f) = ⊂f⊃&"s" # field size of f, in bits;
define fd(f) = ⊂f⊃&"d" # field displacement of f, in bits;

define field(f,x) = ⊂ifc fd(f)=0 thenc ((x) land (2↑fs(f)-1))
	elsec ifc fs(f)+fd(f)≥bitsperwd thenc ((x) lsh -fd(f))
	elsec (((x) lsh -fd(f)) land (2↑fs(f)-1)) endc endc⊃ # field f of x;

define excepsize=373,sufsize=109,prefsize=109,btabsize=30 # hyphenation table sizes;
integer array exceptable[0:excepsize-1] # ordered hash table for exceptional words;
integer array excephyph[1:excepsize-1] # corresponding hyphenation patterns;
integer array suffix[0:sufsize-1] # interpretive commands for suffixes;
integer array prefix[0:prefsize-1] # interpretive commands for prefixes;
integer array btable[2:btabsize+1] # consonant-pair exception table;

procedure confusion;print("confusion");

integer finale # location of final "e" when the suffix routine starts
	(temporarily set to 999999 if the suffix "ed" was just removed);
procedure hyphenate(integer u,n);
begin comment Assuming that mem[u]=0, mem[u+1]=a[i] for 1≤i≤n, mem[u+n+1]=0,
this procedure hyphenates the word a[1]...a[n] by setting mem[u+i]←0 when
a hyphen comes just before a[i], using TEX's hyphenation algorithm;
integer b,c,h,i,j,t,pc;
boolean firsttime;
label hashloop,phase2,sufbegin,interps,falsexx,marksuf,restarts,phase3,checkc,
restartp,interpp,marki,phase4c,vowelscan,phase4v,phase4vc,ertest,phase5,hashsearch;
comment People who don't like go to statements should not read this;

define o(c)=⊂"c" land '37⊃ # five-bit version of ascii character c;

finale←1000000 # infinity;

comment Phase 1. Search exception dictionary (an ordered hash table);
j← 7 min n;
hashsearch: t←mem[u+1];
for i←u+2 thru u+j do t←(t lsh 5)+mem[i];
h←t mod excepsize;
hashloop: while exceptable[h]>t do h←h-1;
if exceptable[h]≠t then
	begin if h then
		begin if j≠n or mem[u+n]≠o(s) then go to phase2;
		j←j-1; go to hashsearch;
		end;
	h←excepsize-1; go to hashloop;
	end;

comment Now the first 7 letters have been found in exceptable[h].
The corresponding hyphenation pattern appears in excephyph[h], but it
may be necessary to check more than seven letters to make sure the exception
applies. Additional letters to check appear at the righthand side of
excephyph[h], in a straightforward manner exhibited by the following code;

t←excephyph[h];
while t land '37 do
	begin comment must check another letter;
	j←j+1;
	if mem[u+j]≠t land '37 then go to phase2;
	t←t lsh -5;
	end;

t←excephyph[h] land(flag ash(2-n)) # leftmost n-1 bits;
i←u+3;
while t do
	begin if t<0 then mem[i]←0;
	t←t lsh 1; i←i+1;
	end;
go to phase5;

comment Phase 2. Interpretive routine for suffix removal. 
The array suffix contains a "program" for a machine with the following
architecture. Instruction words have four fields, namely opcode, truex,
falsex, operand, each 9 bits. There are two registers: the program counter pc and
the character position i. There is also a toggle called firsttime.
Initially i=u+n-1, pc=mem[u+n], firsttime=true.
(Thus we begin by branching on the final character, mem[u+n].) The opcodes
are as follows, using t to stand for the operand field of the instruction:

	scan. If mem[i]=t, decrease i by 1 and go to truex, else go to falsex.
	double. Analogous, but tests if mem[i]=mem[i-1].
	table. Analogous, but tests if mem[i]εsuffix[t], where xεy means that
		word y shifted left x bits has a leading 1 bit.
	check. Analogous, but tests if i>u+3 and does not decrease i.
	success. Sets mem[i+t+1]←0, stops.
	fail. Stops.
	repeat. Sets mem[i+t+1]←0, firsttime←false, i←i+t-1, pc←mem[i+1]. Thus,
		the suffix routine is re-entered before the present suffix.*
	again. If firsttime, sets firsttime←false, i←u+n-2, pc←mem[i+1]. Thus,
		the suffix routine is re-entered with the final character omitted.*
		Otherwise goes to truex.
	mark. If t>0 or firsttime, sets mem[i+t+1]←0. Then goes to truex.
	efail. (Special routine used to omit "ed".) If mem[u+n]="d" and
	mem[u+n-1]="e", sets mem[u+n-1]←0, i←u+n-3, pc←mem[u+n-2]. Otherwise stops.

* Actually the suffix routine is reentered only when i≥u+3;

define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
	oprandd=0 # fields in interpreted instructions;
comment the above uses the fact that bitsperwd=36, much smaller fields would work;

define scan=0,double=1,table=2,check=3,success=4,fail=5,repeat=6,again=7,
	mark=8,efail=9 # numeric equivalents of symbolic opcodes;

phase2: i←u+n-1; firsttime←true;
sufbegin: pc←mem[i+1]; if pc=o(e) then finale←i+1
else if finale=999999 then finale←i+2 else finale←1000000;
interps: case field(opcode,t←suffix[pc]) of begin
[scan] if(mem[i] xor t) land '37 then go to falsexx else i←i-1;
[double]if mem[i]≠mem[i-1] then go to falsexx else i←i-1;
[table] if(suffix[field(oprand,t)]lsh mem[i])≥0 then go to falsexx else i←i-1;
[check] if i≤u+3 then go to falsexx;
[success] begin mem[i+field(oprand,t)+1]←0; go to phase3 end;
[fail] go to phase3;
[repeat] begin i←i+field(oprand,t)-1; go to marksuf end;
[again] if firsttime then begin i←u+n-2; go to restarts end;
[mark] if (j←field(oprand,t)) or firsttime then mem[i+j+1]←0;
[efail] if mem[u+n]=o(d) and mem[u+n-1]=o(e) then
	begin i←u+n-3; finale←999999; go to marksuf;
	end
else go to phase3;
else confusion
  end;
pc←field(truex,t); go to interps;
falsexx: pc←field(falsex,t); go to interps;
marksuf: mem[i+2]←0;
restarts: firsttime←false; if i≥u+3 then go to sufbegin;

comment Phase 3. Interpretive routine for prefix removal. 
The array prefix contains a "program" for a machine with the following
architecture. Instruction words have four fields, namely opcode, truex,
falsex, operand, each 9 bits. There are two registers: the program counter pc and
the character position i. Initially i=u+2 and pc=mem[u+1].
(Thus we begin by branching on the first character, mem[u+1].) The opcodes
are as follows, using t to stand for the operand field of the instruction:

	scan. If mem[i]=t, increase i by 1 and go to truex, else go to falsex.
	repeat. Set i←i-t. If mem[i+1]=0, stop, otherwise set pc←mem[i],
		mem[i]←0, i←i+1.
	mark. If t>0 then set mem[i-t]←0. Also remember the value of mem[i],
		for phase 4, then set mem[i]←0 (unless mem[i+1]=0) and stop.
	table. If mem[i]ε(bit-pattern specified in truex,falsex,oprand fields)
		then do a mark 0, otherwise just stop.
	fail. Stop.
	vow,cons. Stop.

Actually there are four flavors of stopping: One (vow) goes to phase 4 assuming
that mem[i-1] is a vowel, another (cons) goes to phase 4 with mem[i-1] ignored,
the third (fail) omits phase 4 entirely, the last (table when unsuccessful)
goes to phase 4 restarting at the beginning of the word;

define vow=success, cons=again # numeric versions of new opcodes;

phase3: pc←mem[u+1]; i←u+2;
restartp: c←pc; j←i-1;
interpp: case field(opcode,t←prefix[pc]) of begin
[scan]if(mem[i] xor t)land '37 then begin pc←field(falsex,t); go to interpp end
else begin i←i+1; pc←field(truex,t); go to interpp end;
[repeat] begin i←i-field(oprand,t)+1; if mem[i]=0 then go to phase5;
pc←mem[i-1]; mem[i-1]←0; go to restartp end;
[mark] begin if t←field(oprand,t) then mem[i-t]←0; go to marki end;
[table] if t lsh(mem[i]+opcodes)<0 then go to marki
else begin i←j; go to vowelscan end;
[fail] go to phase5;
[vow] go to phase4v;
[cons] go to phase4c;
else confusion
  end;

comment Phase 4. This phase implements the consonant-pairs rule for middle
of words, as explained in the TEX writeup. Basically there are a few
special rules for double consonants and combining ch, gh, ph, sh, th into
single consonants, and then there are exceptional pairs of consonants between
which we will not break. There are two classes of exceptions, strong (like bl)
and weak (like ft). The necessary information is packed in btable, whose
words consist of three fields:

	hchar	specifies code for this character followed by letter h
	weak	specifies address of "weak" exception table for this character
	leading 26 bits, give "strong"∨"weak" exception table

In order to keep hchar and weak to 3-bit fields, their values are encoded in
a straightforward manner that can be deduced by reading the following code;

define hchars=3,hchard=0,weaks=3,weakd=hchars # definition of btable fields;

marki: comment Now mark a permissible hyphen in mem[i] and do phase4 scanning;
if mem[i+1]=0 then go to phase5 # we don't allow only one letter between pref,suf;
c←mem[i]; mem[i]←0; go to vowelscan;
phase4c: c←mem[i];
vowelscan: comment We're looking for a vowel. Now c contains the letter
originally in mem[i], and suffix[0] is a table of vowels (including the null
code 0 as a vowel);
i←i+1; if(suffix[0] lsh c)≥0 then go to phase4c;
checkc: comment Now c is 0 if we've gone too far, else we've found a vowel;
if c=0 then go to phase5;
phase4v: b←mem[i]; i←i+1; if(suffix[0]lsh b)<0 then begin c←b;go to checkc;end;

comment Now b=mem[i-1] is a consonant following a vowel;
phase4vc: c←mem[i];
if b=o(q) and c=o(u) then begin i←i-1; go to marki end;
if(suffix[0] lsh c)<0 then begin i←i+1; go to checkc end;
if b=c then
	begin comment double consonant;
	if c≠o(l) and c≠o(s) then go to marki else
		begin comment ll or ss, check for vowel;
		if (c←mem[i+1])=0 then go to phase5;
		if(suffix[0]lsh c)<0 then go to ertest;
		i←i+2; go to phase4c;
		end;
	end
else if c=o(h) and j←field(hchar,btable[b]) then
	begin comment change ch→e,gh→i,ph→o,sh→u,th→y;
	b←b+j-2; i←i+1; go to phase4vc;
	end
else if c=o(k) and b=o(c) then begin i←i+1; go to marki end;
if mem[i+1]=o(h) and j←field(hchar,btable[c]) then
	begin comment change ch→e, etc., in second consonant position;
	c←c+j-2; j←i+2;
	end
else j←i+1 # Now j points to where we want a vowel;
if mem[j]=0 then go to phase5;
if(suffix[0] lsh mem[j])<0 then
	begin comment vowel-consonant-consonant-vowel found;
	if(btable[b] lsh (c-1))≥0 then go to marki # not an exception;
	if(btable[field(weak,btable[b])+26] lsh(c-1))≥0 then
		begin comment a strong exception;
		i←j+1; go to phase4v;
		end;
 	comment a weak exception; i←j-1;
	if ((mem[i+1]=o(a) and mem[i+2]=o(g) and finale=i+3)
		or (mem[i+1]=o(e) and mem[i+2]=o(s) and mem[i+3]=o(t)))
		and mem[i+4]=0 then go to phase5 else go to ertest;
	end;
comment three consonants in a row found;
i←j+1; go to phase4c;
ertest: if mem[i+1]=o(e) and mem[i+2]=o(r) and mem[i+3]=0
	then go to phase5 else go to marki;

comment Phase 5. We're almost done! Although previous phases may have set mem[u+1]
or mem[u+n-1] or mem[u+n] to zero, we simply ignore this fact as we
output the answer;
phase5:
end;
comment Initializing the exception table;

procedure xent(string s) # enter an exception s;
begin integer n,m,c,w,t,i,j,h; string ss;
ss←s; n←0; w←0; m←0;
while c←lop(s) do
	if c="-" then w←w lor 1 else
	if c="*" then m←m+1 else
	begin n←n+1; w←w lsh 1;
	mem[n] ← c land '37;
	end;
w←w rot(1-n);
j←7 min n;
while m do begin w←w+(mem[j+m]lsh(5*(m-1))); m←m-1 end;
t←mem[1];
for i←2 thru j do t←(t lsh 5)+mem[i];
h←t mod excepsize;
while t do
	begin while exceptable[h]>t do h←h-1;
	if h=0 then h←excepsize-1
	else if exceptable[h]=t then
		begin print(newline,"Whoops: double entry ",ss);
		return;
		end
	else	begin j←exceptable[h]; c←excephyph[h];
		exceptable[h]←t; excephyph[h]←w;
		t←j; w←c;
		end;
	end;
end;

preload_with
"con-trol-lable","un-con-trollable",
"eq-uable","in-sa-tiable","ne-go-tiable","so-ciable","turn-table","un-so-ciable",
"de-pend-ent","in-de-pend-ent",
"any-thing","bal-ding","dar-ling","dump-ling","err-ing","eve-ning","every-thing",
"far-thing","found-ling","ink-ling","main-spring","nest-ling","off-spring",
"play-thing","sap-ling","shoe-string","sib-ling","some-thing","star-ling",
"ster-ling","un-err-ing","up-swing","weak-ling","year-ling",
"civ-i-lize","crys-tal-lize","im-mo-bi-lize","me-ta-bo-lize","mo-bi-lize",
"mo-nop-o-lize","sta-bi-li*ze","tan-ta-lize","un-civ-i-lized",
"pal-ate",
"in-clem-ent",
"bar-on-ess","li-on-ess",
"eu-logy","ped-a-gogy",
"lus-cious",
"at-mos-phere",
"met-al","non-metal","pet-al","postal","rent-al",
"cat-ion",
"com-bat-ive",
"stat-ure",
"beck-on","bes-tial",
"com-a-tose","come-back","co-me-dian","comp-troller",
"cone-flower","co-nun-drum",
"equipped",
"handle-bar",
"inch-worm","ink-blot","inn-keeper",
"in-te-rior",
"min-is-ter","min-is-try",
"none-the-less",
"qua-drille",
"som-er-sault",
"su-pe-rior",
"una-nim-ity","unan-i-mous","unc-tuous",
"debt-or",
"ac-knowl-edge",
"de-duct-i*ble","ex-act-i-tude","in-ex-act-i-tude",
"pre-dict-*able","re-spect-*able","un-pre-dict-able","vict-ual",
"nee-dle-work","idler",
"buff-er","off-beat","off-hand","off-print","off-shoot","off-shore",
"stiff-en",
"left-ist","left-over","lift-off",
"soft-hearted",
"egg-nog","egg-head",
"cognac","for-eign-er","vi-gnette",
"hogs-head",
"child-ish","eld-est","gold-en","hold-out","hold-over","hold-up",
"self-ish",
"bull-ish","crest-fallen","dis-till-*ery","fall-out","lull-aby","roll-away",
"sell-out","wall-eye",
"psalm-ist",
"else-where","false-hood",
"con-sult-ant","volt-age",
"re-solv-able","re-volv-er","solv-able","un-solv-able",
"beach-comb-er","bomb-er","climb-er","plumb-er",
"damp-en","damp-est",
"clinch-er","launch-er","lunch-eon","ranch-er","trench-ant",
"an-nouncer","bouncer","fencer","hence-forth","mince-meat","si-lencer",
"bind-ery","bound-ary","com-mend-*a-*t*ory","de-pend-able","ex-pend-able",
"fiend-ish","land-owner","out-land-ish","round-about","send-off","stand-out",
"change-over","hang-out","hang-over","orange-ade",
"venge-ance",
"ac-count-ant","ant-acid","ant-eater","count-ess","rep-re-sentative",
"ant-hill","pent-house","per-cent-*age",
"ac-cept-able","ac-cept-or","adapt-able","adapt-er","crypt-analysis",
"in-ter-ru*p*t-*i*ble",
"an-tiq-uity","in-eq-uity","in-iq-uity","liq-uefy","liq-uid",
"liq-ui-date","liq-uor","pre-req-ui-site","req-ui-sition",
"ubiq-ui-tous",
"ab-sorb-ent","carb-on","herbal","im-per-turb-able",
"arch-ery","arch-angel","re-search-ers",
"ac-cord-ance","board-er","chordal","hard-en","hard-est","haz-ard-ous",
"jeop-ard-ize","re-cord-er","stand-ard-ize","stew-ard-ess","yard-age",
"surf-er",
"curl-i-que",
"af-firm-a-*t*i*ve","con-form-*ity","de-form-ity","in-form-a*nt","non-con-form-ist",
"cav-ern-ous","dis-cern-ible","mod-ern-ize","turn-about","turn-over",
"un-gov-ern-able","west-ern-ize",
"harp-ist","sharp-en",
"coars-en","ir-re-vers-ible","nurse-maid","nurs-ery","purser","re-hears-al",
"re-vers-ible","wors-en",
"art-ist","con-vert-ible","court-yard","fore-short-en","heart-ache","heart-ily",
"short-en",
"apart-heid","court-house","earth-en-ware","north-east","north-ern","port-hole",
"nerv-ous","ob-serv-a*ble","ob-serv-er","pre-serv-*a-*t*i*ve","serv-er",
"serv-ice-able",
"pre-school",
"con-de-scend","cre-scendo","de-cre-scendo","de-scend-ent","de-scent",
"pleb-i-scite","re-scind","sea-scape",
"askance","snake-skin","whisk-er",
"cole-slaw",
"rattle-snake",
"class-room","class-ify","cross-over","dis-miss-al","ex-press-*i*ble",
"im-pass-able","less-en","pass-able","toss-up","un-class-i-fied",
"ar-mi-stice","astig-ma-tism","astir","aston-ish-ment","blast-off","by-stand-er",
"candle-stick","cast-away","cast-off","con-test-ant","co-star",
"de-test-able","di-gest-ible","east-ern","ex-ist-ence","fore-stall",
"in-con-test-able","in-di-ges*t-*i*ble","in-ex-haust-ible","life-style",
"lime-stone","live-stock","mile-stone","non-ex-ist-ent","per-sist-ent",
"pho-to-stat","re-start-ed","re-state-ment","re-store","shy-ster",
"side-step","smoke-stack","sug-gest-*i*ble","thermo-stat","waste-bas-ket",
"waste-land",
"mast-head","post-hu-mous","priest-hood",
"side-swipe",
"watt-meter",
"be-tween",
"kib-itzer",
"buzz-er",
"al-go-rithm","bib-li-og-raphy","bi-no-mial","cat-e-go-ry",
"cen-ter","com-put-a*bil-ity",
"dec-la-ra-tion","de-gree","de-vel-op-ment",
"es-tab-lish","hap-hazard","neg-li-gible","pe-ri-odic",
"poly-no-mial","pre-vious","pro-ce-dure","prob-able","prob-abil-ity",
"prob-lem-atic","pro-gram-ming","pub-li-ca-tion","pub-lish","ref-er-enc*e",
"re-place-ment","when-ever",
""; string array exceptions[0:excepsize-1];

procedure initex;
begin integer i; string s;
arrclr(exceptable); arrclr(excephyph);
i←0;
while s←exceptions[i] do
	begin xent(s); i←i+1;
	end;
print(newline,"Exception table contains ",i," entries.");
end;
comment Initializing the suffix table;

procedure initsuf;
begin
define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
	oprandd=0 # fields in interpreted instructions;
define scan=0,double=1,table=2,check=3,success=4,fail=5,repeat=6,again=7,
	mark=8,efail=9 # numeric equivalents of symbolic opcodes;

define s(n,a,b,c,d)=⊂suffix[n]←(a lsh opcoded)+(b lsh oprandd)+
	(c lsh truexd)+(d lsh falsexd)⊃;
define t(c)=⊂(flag lsh -("c" land '37))⊃;

suffix[0]←flag+t(a)+t(e)+t(i)+t(o)+t(u)+t(y);
s(1,fail,0,0,0)	# a;
s(2,fail,0,0,0) # b;
s(3,scan,"i",34,1) # c;
s(4,again,0,1,0) # d;
s(5,mark,0,38,0) # e;
s(6,fail,0,0,0) # f;
s(7,scan,"n",60,1) # g;
s(8,fail,0,0,0) # h;
s(9,fail,0,0,0) # i;
s(10,fail,0,0,0) # j;
s(11,fail,0,0,0) # k;
s(12,scan,"a",71,72) # l;
s(13,fail,0,0,0) # m;
s(14,scan,"o",77,1) # n;
s(15,fail,0,0,0) # o;
s(16,fail,0,0,0) # p;
s(17,fail,0,0,0) # q;
s(18,scan,"e",81,1) # r;
s(19,mark,0,85,0) # s;
s(20,scan,"n",94,1) # t;
s(21,fail,0,0,0) # u;
s(22,fail,0,0,0) # v;
s(23,fail,0,0,0) # w;
s(24,fail,0,0,0) # x;
s(25,scan,"l",31,98) # y;
s(26,efail,0,0,0) # z;
s(27,success,0,0,0);
s(28,success,1,0,0);
s(29,success,2,0,0);
s(30,success,3,0,0);
s(31,repeat,0,0,0);
s(32,repeat,1,0,0);
s(33,repeat,2,0,0);
s(34,scan,"p",35,26) # e/ic;
s(35,scan,"o",36,26) # pe/pic;
s(36,scan,"c",37,26) # ope/opic;
s(37,scan,"s",27,26) # cope/copic;
s(38,scan,"l",39,40) # e;
s(39,scan,"b",41,26) # le;
s(40,scan,"t",42,43) # e;
s(41,scan,"a",44,26) # ble;
s(42,scan,"a",45,26) # te;
s(43,scan,"z",46,47) # e;
s(44,scan,"t",48,49) # able;
s(45,table,50,108,26) # ate;
s(46,scan,"i",51,26) # ze;
s(47,scan,"v",52,53) # e;
s(48,table,54,33,26) # table;
s(49,table,107,32,26) # able;
suffix[50]←t(c)+t(l);
s(51,scan,"l",32,26) # ize;
s(52,scan,"i",55,26) # ve;
s(53,scan,"r",56,34) # e;
suffix[54]←t(n)+t(r);
s(55,scan,"t",27,26) # ive/ure;
s(56,scan,"u",55,57) # re;
s(57,scan,"e",58,26) # re;
s(58,scan,"h",59,26) # ere;
s(59,scan,"p",37,26) # here;
s(60,scan,"i",61,1) # ng;
s(61,check,0,62,1) # ing;
s(62,scan,"l",63,64) # ing;
s(63,table,65,27,66) # ling;
s(64,table,67,28,68) # ing;
suffix[65]←t(b)+t(c)+t(d)+t(f)+t(g)+t(p)+t(t)+t(z);
s(66,scan,"k",69,28) # ling;
suffix[67]←t(f)+t(s)+t(z);
s(68,table,0,28,70) # ing;
s(69,scan,"c",29,27) # kling;
s(70,double,0,27,27) # ing;
s(71,scan,"i",73,74) # al;
s(72,scan,"u",75,1) # l;
s(73,scan,"t",27,76) # al/ial;
s(74,scan,"n",14,73) # al;
s(75,scan,"f",31,1) # ul;
s(76,scan,"c",27,1) # al/ial/ient;
s(77,scan,"i",78,1) # on/onal;
s(78,table,79,80,1) # ion/ional;
suffix[79]←t(s)+t(t);
s(80,mark,4,27,0) # sion/sional/tion/tional;
s(81,scan,"h",82,1) # er/y;
s(82,scan,"p",83,1) # her/hy;
s(83,scan,"a",84,1) # pher/phy;
s(84,scan,"r",27,1) # apher/aphy;
s(85,scan,"u",86,87) # s;
s(86,scan,"o",88,4) # us;
s(87,scan,"s",89,4) # s;
s(88,scan,"i",90,4) # ous;
s(89,scan,"e",91,4) # ss;
s(90,scan,"c",92,4) # ious;
s(91,table,93,31,4) # ess;
s(92,scan,"s",27,27) # cious;
suffix[93]←t(l)+t(n);
s(94,scan,"e",95,1) # nt;
s(95,scan,"m",31,96) # ent;
s(96,scan,"d",27,97) # ent;
s(97,scan,"i",76,1) # ent;
s(98,scan,"g",99,100) # y;
s(99,scan,"o",27,1) # gy;
s(100,scan,"r",101,81) # y;
s(101,scan,"a",102,1) # ry;
s(102,scan,"n",103,1) # ary;
s(103,scan,"o",104,105) # nary;
s(104,scan,"i",106,28) # onary;
s(105,scan,"e",29,27) # nary;
s(106,repeat,3,0,0) # ionary;
suffix[107]←t(e)+t(h)+t(i)+t(k)+t(l)+t(o)+t(u)+t(v)+t(w)+t(x)+t(y);
s(108,table,0,28,26) # cate/late;
end;
comment Initializing the prefix table;

procedure initpref;
begin
define opcodes=9,opcoded=27,truexs=9,truexd=18,falsexs=9,falsexd=9,oprands=9,
	oprandd=0 # fields in interpreted instructions;
define scan(n,c,t,f)=⊂prefix[n]←"c"+(t lsh truexd)+(f lsh falsexd)⊃;
define repeat(n,t)=⊂prefix[n]←(6 lsh opcoded)+t⊃;
define mark(n,t)=⊂prefix[n]←(8 lsh opcoded)+t⊃;
define table(n)=⊂prefix[n]←(2 lsh opcoded)⊃;
define fail(n)=⊂prefix[n]←5 lsh opcoded⊃;
define vow(n)=⊂prefix[n]←4 lsh opcoded⊃;
define cons(n)=⊂prefix[n]←7 lsh opcoded⊃;
define t(c)=⊂(flag lsh -(("c" land '37)+opcodes))⊃;
define vs=1,cs=6,ts=7 # locations where there is a "vow","cons","table0" inst;

fail(0) # in case mem[u+1] gets set to zero by the suffix routine;
vow(1) # a;
scan(2,e,34,cs) # b;
scan(3,o,36,cs) # c;
scan(4,i,38,cs) # d;
scan(5,q,41,44) # e;
cons(6) # f;
table(7) # g;
scan(8,a,45,47) # h;
scan(9,m,27,55) # i;
cons(10) # j;
cons(11) # k;
scan(12,e,61,cs) # l;
scan(13,a,63,70) # m;
scan(14,o,76,cs) # n;
scan(15,u,77,78) # o;
scan(16,s,81,cs) # p;
scan(17,u,85,cs) # q;
cons(18) # r;
scan(19,e,87,89) # s;
scan(20,h,97,99) # t;
scan(21,n,106,vs) # u;
cons(22) # v;
cons(23) # w;
cons(24) # x;
vow(25) # y;
cons(26) # z;
repeat(27,0);
repeat(28,1);
repeat(29,2);
mark(30,0);
mark(31,1);
mark(32,2);
mark(33,3);
table(34)+t(c)+t(h)+t(s)+t(w) # be;
scan(35,i,vs,27) # un;
scan(36,m,30,37) # co;
scan(37,n,30,vs) # co;
scan(38,s,39,vs) # di;
scan(39,h,ts,40) # dis;
scan(40,y,vs,27) # dis;
scan(41,u,42,cs) # eq;
scan(42,i,43,cs) # equ;
scan(43,v,30,30) # equi;
scan(44,x,30,vs) # e;
scan(45,n,46,vs) # ha;
scan(46,d,30,ts) # han;
scan(47,o,48,51) # h;
scan(48,r,49,vs) # ho;
scan(49,s,50,ts) # hor;
scan(50,e,30,ts) # hors;
scan(51,y,52,cs) # h;
scan(52,p,53,vs) # hy;
scan(53,e,54,ts) # hyp;
scan(54,r,33,vs) # hype;
scan(55,n,56,vs) # i;
scan(56,t,57,27) # in;
scan(57,e,58,59) # int;
scan(58,r,33,29) # inte;
scan(59,r,60,28) # int;
scan(60,o,33,29) # intr;
scan(61,x,62,vs) # le;
scan(62,i,31,ts) # lex/max/min;
scan(63,c,64,66) # ma;
scan(64,r,65,ts) # mac;
scan(65,o,32,ts) # macr;
scan(66,t,67,69) # ma;
scan(67,h,68,ts) # mat;
scan(68,e,31,ts) # math;
scan(69,x,62,vs) # ma;
scan(70,i,71,72) # m;
scan(71,n,62,vs) # mi;
scan(72,u,73,cs) # m;
scan(73,l,74,vs) # mu;
scan(74,t,75,ts) # mul;
scan(75,i,32,ts) # mult;
scan(76,n,27,vs) # no;
scan(77,t,30,vs) # ou;
scan(78,v,79,vs) # o;
scan(79,e,80,ts) # ov;
scan(80,r,27,vs) # ove;
scan(81,e,82,cs) # ps;
scan(82,u,83,vs) # pse;
scan(83,d,84,vs) # pseu;
scan(84,o,32,ts) # pseud;
scan(85,a,86,cs) # qu;
scan(86,d,30,vs) # qua;
scan(87,m,88,vs) # se;
scan(88,i,30,ts) # sem;
scan(89,o,90,92) # s;
scan(90,m,91,vs) # so;
scan(91,e,30,ts) # som/ther;
scan(92,u,93,cs) # s;
scan(93,b,30,94) # su;
scan(94,p,95,vs) # su;
scan(95,e,96,ts) # sup;
scan(96,r,33,vs) # supe;
scan(97,e,98,cs) # th;
scan(98,r,91,vs) # the;
scan(99,r,100,cs) # t;
scan(100,a,101,104) # tr;
scan(101,n,102,vs) # tra;
scan(102,s,103,ts) # tran;
table(103)+t(a)+t(f)+t(g)+t(l)+t(m) # trans;
scan(104,i,105,cs) # tr;
table(105)+t(a)+t(f)+t(u) # tri;
scan(106,d,107,35) # un;
scan(107,e,108,28) # und;
scan(108,r,33,29) # unde;
end;
comment Initializing the consonant-pair table;

procedure initb # sets btable;
begin
define hchars=3,hchard=0,weaks=3,weakd=3 # definition of btable fields;
define t(c)=⊂(flag lsh -(("c" land '37)-1))⊃;
define weak(n)=⊂(n lsh weakd)+btable[26+n]⊃;
define b(n)=⊂btable[n]←0⊃;

b(26) # weak(0) and z;
b(27)+t(t) # weak(1), for f and s;
b(28)+t(d) # weak(2), for l;
b(29)+t(p) # weak(3), for m;
b(30)+t(d)+t(g)+t(s)+t(t) # weak(4), for n;
b(31)+t(g)+t(m)+t(n)+t(t) # weak(5), for r;

b(2)+t(l)+t(r) # b;
b(3)+t(l)+t(r)+4 # c;
b(4)+t(g)+t(r) # d;
b(5)+t(l)+t(r) # ch;
b(6)+t(l)+t(r)+weak(1) # f;
b(7)+t(l)+t(r)+4 # g;
b(8) # h;
b(9)+t(t) # gh;
b(10) # j;
b(11)+t(n) # k;
b(12)+t(k)+t(q)+weak(2) # l;
b(13)+weak(3) # m;
b(14)+t(e)+t(k)+t(x)+weak(4) # n;
b(15)+t(r) # ph;
b(16)+t(l)+t(r)+1 # p;
b(17) # q;
b(18)+t(k)+weak(5) # r;
b(19)+t(p)+t(q)+weak(1)+4 # s;
b(20)+t(e)+t(r)+7 # t;
b(21) # sh;
b(22) # v;
b(23)+t(h)+t(l)+t(n)+t(r) # w;
b(24) # x;
b(25)+t(r) # th;
end;
comment The driver program;

integer u,n,c; string s,ss;

initex;initsuf;initpref;initb;

u←3;
while true do
	begin print(newline,": ");
	s←inchwl;
	if s=0 then done;
	ss←s; n←0; mem[u]←0;
	while c←lop(ss) do begin n←n+1; mem[u+n]←c land '37; end;
	mem[u+n+1]←0;
	if n<4 then print(s) else
		begin hyphenate(u,n);
		mem[u+1]←mem[u+2]←mem[u+n-1]←mem[u+n]←1;
		n←0; while c←lop(s) do
			begin n←n+1;
			if mem[u+n]=0 and(u+n+2<finale or u+n>finale)
			then print("-"&c) else print(null&c);
			end;
		end;
	end;
end